library(dplyr)
library(leaflet)
library(geosphere)
library(data.table)
library(ggplot2)
library(lubridate)
library(caret)
library(readxl)
library(tidyverse)
library(ggthemes)
library(ggrepel)
library(reshape)
library(reshape2)
library(devtools)
We used all of the bluebike data in 2017
# Importing the all monthly data in 2017
dat_201701<-read.csv("201701-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201702<-read.csv("201702-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201703<-read.csv("201703-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201704<-read.csv("201704-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201705<-read.csv("201705-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201706<-read.csv("201706-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201707<-read.csv("201707-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201708<-read.csv("201708-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201709<-read.csv("201709-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201710<-read.csv("201710-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201711<-read.csv("201711-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201712<-read.csv("201712-hubway-tripdata.csv",stringsAsFactors = FALSE)
# Combine them
dat_2017 <- rbind(dat_201701, dat_201702, dat_201703, dat_201704, dat_201705,
dat_201706, dat_201707, dat_201708, dat_201709, dat_201710,
dat_201711, dat_201712)
## age, age_cat, duration_min, year, month, month_abb, day, hour, wday, weekend
bbike <- dat_2017 %>%
mutate(birth.year = as.numeric(birth.year)) %>%
mutate(age = 2017 - birth.year) %>%
mutate(age_cat = case_when(
.$age >= 10 & .$age < 20 ~ 1,
.$age >= 20 & .$age < 30 ~ 2,
.$age >= 30 & .$age < 40 ~ 3,
.$age >= 40 & .$age < 50 ~ 4,
.$age >= 50 & .$age < 60 ~ 5,
.$age >= 60 & .$age < 70 ~ 6,
.$age >= 70 & .$age < 80 ~ 7,
.$age >= 80 ~ 8)) %>%
mutate(duration_min = tripduration / 60) %>%
mutate(year = year(starttime),
month = month(starttime),
month_abb = month(starttime, label = TRUE, abbr = TRUE),
day = day(starttime),
hour = hour(starttime),
wday = wday(starttime, label = TRUE, abbr = TRUE))
## Warning in evalq(as.numeric(birth.year), <environment>): NAs introduced by
## coercion
## Trip distance (km)
setDT(bbike)[ , dist_km := distGeo(matrix(c(start.station.longitude, start.station.latitude), ncol = 2),matrix(c(end.station.longitude, end.station.latitude), ncol = 2))/1000]
bbike <- as.data.frame(bbike)
## overtime (if duration_min > 45, 1, 0)
bbike <- bbike %>%
mutate(overtime = ifelse(duration_min > 45, 1, 0))
## user_start, user_end: number of users at the start/end station
bbike <- bbike %>%
group_by(start.station.id) %>%
mutate(user_start = n())
bbike <- bbike %>%
group_by(end.station.id) %>%
mutate(user_end = n())
## temp_max, temp_min, rain, snownice(snow or ice)
weather <- read_excel("boston_weather.xls")
bbike <- bbike %>%
group_by(year, month, day) %>%
left_join(., weather, by = c("year", "month", "day"))
crash <- read.csv("crash_sept.csv", stringsAsFactors = FALSE)
crash <- crash %>%
mutate(year = year(dispatch_ts), month = month(dispatch_ts), day = day(dispatch_ts))
temp <- crash %>%
filter(year == 2017) %>%
group_by(year, month, day, mode_type) %>%
summarise(crash = n())
crash_wide <- temp %>%
spread(mode_type, crash)
bbike_crash <- left_join(bbike, crash_wide, by = c("year", "month", "day"))
The number of bluelike users has increased over several years. Among subscribers, if they pay $99 per year, they can use it unlimitedly. However, there is a time limit, for 45 minutes per once. We are going to figure out when they do not return their bike within 45 minutes and predict the pattern.
Membership and Ridership More than 8 million trips have been taken by Bluebikes riders since the 2011 launch (as of 12/2018) An estimated 87,000 unique riders took trips in 2016
bbike_summary <- read_excel("bluebike_summary.xlsx")
plot <- melt(bbike_summary, id.vars = "year") %>%
filter(variable == "subscriber" | variable == "customer")
plot %>%
ggplot(aes(year, value)) +
geom_bar(aes(fill = variable), stat = "identity", position = "stack") +
scale_fill_manual(values = c("#FF8F1C", "#0050B5")) +
scale_x_continuous(breaks = c(2011, 2012, 2013, 2014, 2015, 2016, 2017)) +
xlab("Year") +
ylab("Riders") +
ggtitle("Number of Riders since 2011") +
theme_bw()
bbike_summary %>%
ggplot(aes(year, total_trips)) +
geom_point(color = "#FF8F1C", size = 2) +
geom_line(color = "#1D428A", alpha = 0.7) +
xlab("Year") +
ylab("Numbers") +
ggtitle("Total Number of Trips since 2011") +
theme_bw()
bbike %>%
mutate(gender = as.factor(gender)) %>%
ggplot(aes(age_cat)) +
geom_bar(aes(fill = gender)) +
scale_fill_brewer( palette = "Oranges")
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning: Removed 210947 rows containing non-finite values (stat_count).
# Create data frame
dat_station<- read.csv("Hubway_Stations_as_of_July_2017.csv")
dat_2017_station <- bbike %>%
filter(!birth.year=="\\N")%>%
filter(birth.year>1900 & birth.year<=2017)%>%
group_by(start.station.id) %>% summarize(number = n(), start.station.latitude=first(start.station.latitude), start.station.longitude=first(start.station.longitude),start.station.name=first(start.station.name))%>% filter(start.station.latitude>0)
summary(dat_2017_station)
## start.station.id number start.station.latitude
## Min. : 1.00 Min. : 4 Min. :42.30
## 1st Qu.: 54.75 1st Qu.: 1794 1st Qu.:42.34
## Median :108.50 Median : 4750 Median :42.36
## Mean :111.91 Mean : 5625 Mean :42.36
## 3rd Qu.:173.25 3rd Qu.: 7614 3rd Qu.:42.37
## Max. :232.00 Max. :35702 Max. :42.41
## start.station.longitude start.station.name
## Min. :-71.17 Length:196
## 1st Qu.:-71.11 Class :character
## Median :-71.08 Mode :character
## Mean :-71.09
## 3rd Qu.:-71.06
## Max. :-71.01
# Distinguish stations by color based on the number of users
getColor <- function(df){
sapply(df$number, function(number) {
if(number %in% 1:4000) {
"green"
} else if(number %in% 4001:10000) {
"orange"
} else if(number >10000) {
"red"
} else {
"blue"
} })
}
icons <- awesomeIcons(
icon = 'ios-close',
iconColor = 'black',
library = 'ion',
markerColor = getColor(dat_2017_station)
)
leaflet(dat_2017_station) %>% addTiles() %>%
addAwesomeMarkers(~start.station.longitude
, ~start.station.latitude, icon=icons, label=~as.character(number), popup = ~start.station.name)
bbike_member <- bbike %>%
filter(usertype == "Subscriber")
summary(bbike_member$duration_min)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.02 6.10 9.78 13.31 15.48 61276.80
As you can see, the data seems to have wrong information. The very long tripduration might be attributable to lost or other errors. Therefore, we limit the range from 0 to 75 minutes for the duration in this study.
bbike_member <- bbike_member %>%
filter(duration_min < 50)
bbike_member %>%
mutate(group = ifelse(rain == 0, "no rain", "rain")) %>%
ggplot(aes(duration_min, y = ..count.., fill = group)) +
geom_density(alpha = 0.2) +
xlab("Trip Duration (min)") +
ylab("Riders")
bbike_member %>%
mutate(gender = as.factor(gender)) %>%
ggplot(aes(duration_min, y = ..count.., fill = gender)) +
geom_density(alpha = 0.2) +
xlab("Trip Duration (min)") +
ylab("Riders")
# Distance
bbike %>%
filter(birth.year > 1900 & birth.year <= 2017)%>%
group_by(age_cat) %>%
summarize(avg = mean(dist_km), se = sd(dist_km) / sqrt(n())) %>%
ggplot(aes(age_cat, avg))+
geom_boxplot(aes(ymin = avg - 2*se, ymax = avg+2 * se))+
geom_point(color = "#FF8F1C")+
geom_line(color = "#1D428A")+
scale_x_continuous(breaks=(c(1,2,3,4,5,6,7,8)), labels=c("10-20","20-30","30-40","40-50","50-60","60-70","70-80","80-"))+
xlab(expression(paste(Age, " (years)")))+
ylab(expression(paste(Distance," (km)"))) +
theme_bw()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
There is no consistent trend between age and trip distance.
bbikemember <- bbike %>%
ungroup() %>%
filter(usertype == "Subscriber" & duration_min < 50) %>%
mutate(gender = as.factor(gender), day = as.factor(day),
hour = as.factor(hour)) %>%
mutate(rain_cat = ifelse(rain == 0, 0, 1)) %>%
mutate(snownice_cat = ifelse(snownice == 0, 0, 1)) %>%
mutate(overtime = ifelse(duration_min < 15, 0, 1)) %>%
mutate(satsun = ifelse(wday %in% c("Sat", "Sun"), 1, 0))
set.seed(1)
library(caret)
Train <- createDataPartition(bbikemember$overtime, p=0.5, list=FALSE)
training <- bbikemember[ Train, ]
testing <- bbikemember[ -Train, ]
bbikemember$overtime = as.factor(as.numeric(as.character(bbikemember $overtime)))
#overall accuracy
p = 0.358 #290235/810623
y_hat <- sample(c("0","1"), length(testing), replace = TRUE, prob=c(p, 1-p)) %>%
factor(levels = levels(bbikemember$overtime))
mean(y_hat == testing$overtime)
## Warning in `==.default`(y_hat, testing$overtime): longer object length is
## not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
## [1] 0.3973021
#logistic regression
glm.fit <- bbikemember %>%
glm(overtime ~ gender + age + month + satsun + rain_cat +
snownice_cat + user_start + user_end, data=., family = "binomial")
summary(glm.fit)
##
## Call:
## glm(formula = overtime ~ gender + age + month + satsun + rain_cat +
## snownice_cat + user_start + user_end, family = "binomial",
## data = .)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4899 -0.8238 -0.7000 1.3422 2.7176
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.801e-03 3.240e-02 -0.179 0.858
## gender1 -7.352e-01 3.055e-02 -24.061 < 2e-16 ***
## gender2 -4.139e-01 3.074e-02 -13.466 < 2e-16 ***
## age 6.726e-03 1.918e-04 35.063 < 2e-16 ***
## month 6.172e-03 8.827e-04 6.992 2.7e-12 ***
## satsun 2.279e-01 5.587e-03 40.793 < 2e-16 ***
## rain_cat -8.377e-02 4.849e-03 -17.276 < 2e-16 ***
## snownice_cat -5.960e-01 2.921e-02 -20.408 < 2e-16 ***
## user_start -2.905e-05 2.967e-07 -97.903 < 2e-16 ***
## user_end -2.728e-05 2.760e-07 -98.837 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1256483 on 1094867 degrees of freedom
## Residual deviance: 1220356 on 1094858 degrees of freedom
## (1855 observations deleted due to missingness)
## AIC: 1220376
##
## Number of Fisher Scoring iterations: 4
#wday.L = Sunday wday.Q = Mondaywday.C = Tuesdaywday^4 = Wednesday wday^5 = Thursday wday^6 = Friday Intercept = Saturday
#prediction
p_hat <- predict(glm.fit, newdata=testing,type="response")
y_hat <- ifelse(p_hat > 0.5, 1, 0) %>% factor()
#confusion matrix
table(predicted = y_hat, actual = testing$overtime)
## actual
## predicted 0 1
## 0 404191 142443
## 1 441 353
confusionMatrix(data = factor(y_hat), reference = factor(testing$overtime))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 404191 142443
## 1 441 353
##
## Accuracy : 0.739
## 95% CI : (0.7378, 0.7402)
## No Information Rate : 0.7392
## P-Value [Acc > NIR] : 0.6074
##
## Kappa : 0.002
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.998910
## Specificity : 0.002472
## Pos Pred Value : 0.739418
## Neg Pred Value : 0.444584
## Prevalence : 0.739151
## Detection Rate : 0.738345
## Detection Prevalence : 0.998550
## Balanced Accuracy : 0.500691
##
## 'Positive' Class : 0
##
library(purrr)
library(caret)
library(ggplot2)
probs <- seq(0, 1, length.out = 10)
guessing <- map_df(probs, function(p){
y_hat <-
sample(c("0", "1"), length(testing), replace = TRUE, prob=c(p, 1-p)) %>%
factor(levels = levels(bbikemember$overtime))
list(method = "Guessing",
FPR = 1 - specificity(y_hat, bbikemember$overtime),
TPR = sensitivity(y_hat, bbikemember$overtime))
})
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
guessing %>% qplot(FPR, TPR, data =., xlab = "1 - Specificity", ylab = "Sensitivity")
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
png(file = "decision_tree.png")
output_tree <- ctree(overtime ~ gender + age + factor(rain_cat),
data = bbikemember)
plot(output_tree)
dev.off()
## quartz_off_screen
## 2